library(ggplot2)
library(dplyr)
library(reshape2)

load("main_data_local.rdata")
load("main_data_parl.rdata")

#create function for splitting income residuals in brackets

stacker <- 
  function(x){
    stack <- NA
    stack[x >=-3.25] <- -3
    stack[x > -2.75] <- -2.5
    stack[x > -2.25] <- -2
    stack[x > -1.75] <- -1.5
    stack[x > -1.25] <- -1
    stack[x > - .75] <- -.5
    stack[x > - .25] <- 0
    stack[x >   .25] <- .5
    stack[x >   .75] <- 1
    stack[x >  1.25] <- 1.5
    stack[x >  1.75] <- 2
    stack[x >  2.25] <- 2.5
    stack[x >  2.75] <- 3
    return(stack)
  }


# create bars per year for local elections  

# create empty data frame
bar_data <- data_frame()

for(k in seq(1993, 2013, by = 4)){
  # remove bottom 0.1% and top 0.1% to avoid huge outliers. 
  # rescale income residual 
  # remove abs(z) > 3.25
  
  data_an <- 
    data_comp_local %>% 
    filter(four_years == k) %>% 
    filter(in_year == 1) %>% 
    filter(inc_res < quantile(inc_res, p = 0.999) &
             inc_res > quantile(inc_res, p = 0.001) ) %>% 
    mutate(inc_res2 = (inc_res - mean(inc_res)) / sd(inc_res)) %>%
    filter(abs(inc_res2) <= 3.25) %>% 
    mutate(inc_res_bars = stacker(inc_res2))
  
  # group by z categories for population, running candidates, and elected candidates 
  plot_data <- 
    data_frame(z = seq(-3, 3, 0.5), 
               year = k) %>%
    left_join(., data_an %>% 
                group_by(inc_res_bars) %>% 
                summarize(population_margin = n()), 
              by =  c("z" = "inc_res_bars")) %>% 
    left_join(., data_an %>%
                filter(run_kv == 1) %>%
                group_by(inc_res_bars) %>% 
                summarize(candidate_margin = n()), 
              by =  c("z" = "inc_res_bars")) %>% 
    left_join(., data_an %>%
                filter(VALGT_JN == 1) %>%
                group_by(inc_res_bars) %>% 
                summarize(elected_margin = n()), 
              by =  c("z" = "inc_res_bars"))
  
  bar_data <-
    bar_data %>%
    bind_rows(plot_data)  
  
  print(k)
}


# Compile national election data ------------------------------------------
bar_data_fv <- data_frame()

for(k in c(1990, 1994, 1998, 2001, 2005, 2007, 2011, 2015)){
  # remove bottom 0.1% and top 0.1% to avoid huge outliers. 
  # rescale income residual 
  # remove abs(z) > 3.25
  
  data_an <- 
    data_comp_parl %>% 
    filter(four_years == k) %>% 
    filter(in_year == 1) %>% 
    filter(inc_res < quantile(inc_res, p = 0.999) &
             inc_res > quantile(inc_res, p = 0.001) ) %>% 
    mutate(inc_res2 = (inc_res - mean(inc_res)) / sd(inc_res)) %>%
    filter(abs(inc_res2) <= 3.25) %>% 
    mutate(inc_res_bars = stacker(inc_res2))
  
  # group by z categories for population, running candidates, and elected candidates 
  plot_data <- 
    data_frame(z = seq(-3, 3, 0.5), 
               year = k) %>%
    left_join(., data_an %>% 
                group_by(inc_res_bars) %>% 
                summarize(population_margin = n()), 
              by =  c("z" = "inc_res_bars")) %>% 
    left_join(., data_an %>%
                filter(run_fv == 1) %>%
                group_by(inc_res_bars) %>% 
                summarize(parl_candidate_margin = n()), 
              by =  c("z" = "inc_res_bars")) %>% 
    left_join(., data_an %>%
                filter(VALGT_JN == 1) %>%
                group_by(inc_res_bars) %>% 
                summarize(parl_elected_margin = n()), 
              by =  c("z" = "inc_res_bars"))
  

  bar_data_fv <-
    bar_data_fv %>%
    bind_rows(plot_data)  
  
  print(k)
}


# create plots 

bar_data <-
  as.data.frame(bar_data)

bar_data2 <- 
  melt(bar_data, 
       id = c("z", "year")) %>%
  group_by(z, variable) %>% 
  summarise(count = sum(value)) 

# change missing zs to zero. 
bar_data_fv <-
  as.data.frame(bar_data_fv) %>% 
  mutate(parl_candidate_margin = ifelse(is.na(parl_candidate_margin), 0, parl_candidate_margin),
         parl_elected_margin   = ifelse(is.na(parl_elected_margin)  , 0, parl_elected_margin  ))

# subtract 2001 and 2005 from population margin in order not to count them twice

bar_data2_fv <- 
  bar_data_fv %>% 
  mutate(population_margin = ifelse(year == 2001 | year == 2005, 0, population_margin)) %>%
  melt(., 
       id = c("z", "year")) %>%
  group_by(z, variable) %>% 
  summarise(count = sum(value)) 

suppressWarnings({
  bar_data2 <- 
    bind_rows(bar_data2, bar_data2_fv) %>% 
    group_by(z, variable) %>% 
    summarise(count = sum(count)) 
})

bar_data_count <- 
  bar_data2 %>% 
  group_by(variable) %>% 
  summarise(total =  sum(count))

bar_data2 <- 
  left_join(bar_data2, bar_data_count, by = "variable") %>% 
  mutate(share = count / total)

plot <-
   ggplot(data = bar_data2 %>% filter(variable != "population_margin"),
          aes(x = z, 
              y = share, 
              alpha = variable) ) +
  geom_bar(stat = "identity", position = "dodge" ) +
  geom_bar(data = bar_data2 %>% filter(variable == "population_margin"),
         aes(x = z, 
             y = share), 
         stat = "identity", fill = "white", colour = "black", alpha = 1/10 ) +
  theme_classic() + 
  scale_alpha_discrete(range = c(0.2, 1), "", 
                       labels = c("Running for municipality", 
                                  "Elected for municipality",
                                  "Running for parliament", 
                                  "Elected for parliament")) + 
  scale_x_continuous(breaks = -3:3) + 
  scale_y_continuous("") 



###########################################################################
# Test for equality of means  ---------------------------------------------
###########################################################################

ttest_data <-
  tibble()

for(k in seq(1993, 2013, by = 4)){
  # remove bottom 0.1% and top 0.1% to avoid huge outliers. 
  # rescale income residual 
  # remove abs(z) > 3.25
  
  data_an <- 
    data_comp_local %>% 
    filter(four_years == k) %>% 
    filter(in_year == 1) %>% 
    filter(inc_res < quantile(inc_res, p = 0.999) &
             inc_res > quantile(inc_res, p = 0.001) ) %>% 
    mutate(inc_res2 = (inc_res - mean(inc_res)) / sd(inc_res)) %>%
    filter(abs(inc_res2) <= 3.25) 
  
  # test
  
  ttest_data_k <- 
    data_an %>% 
    filter(is.na(VALGT_JN)) %>% 
    summarise(mean = mean(inc_res), se = sqrt(var(inc_res)/length(inc_res))) %>% 
    mutate(year = k,
           level = "Local council",
           place = 3,
           comparison = "Population") %>% 
    bind_rows(data_an %>% 
                filter(!is.na(VALGT_JN)) %>% 
                summarise(mean = mean(inc_res), se = sqrt(var(inc_res)/length(inc_res))) %>% 
                mutate(year = k,
                       level = "Local council",
                       place = 2,
                       comparison = "Candidates")) %>%
    bind_rows(data_an %>% 
                filter(VALGT_JN == 1) %>% 
                summarise(mean = mean(inc_res), se = sqrt(var(inc_res)/length(inc_res))) %>% 
                mutate(year = k,
                       level = "Local council",
                       place = 1,
                       comparison = "Electees"))
  
  ttest_data <-
    ttest_data %>%
    bind_rows(ttest_data_k)  
  
  print(k)
}

# test for parliamentary candidates 

for(k in c(1990, 1994, 1998, 2001, 2005, 2007, 2011, 2015)){
  # remove bottom 0.1% and top 0.1% to avoid huge outliers. 
  # rescale income residual 
  # remove abs(z) > 3.25
  
  data_an <- 
    data_comp_parl %>% 
    filter(four_years == k) %>% 
    filter(in_year == 1) %>%
    filter(inc_res < quantile(inc_res, p = 0.999) &
             inc_res > quantile(inc_res, p = 0.001) ) %>% 
    mutate(inc_res2 = (inc_res - mean(inc_res)) / sd(inc_res)) %>%
    filter(abs(inc_res2) <= 3.25)
  
  # test
  
  ttest_data_k <- 
    data_an %>% 
    filter(is.na(VALGT_JN)) %>% 
    summarise(mean = mean(inc_res), se = sqrt(var(inc_res)/length(inc_res))) %>% 
    mutate(year = k,
           level = "Parliament",
           place = 3,
           comparison = "Population") %>% 
    bind_rows(data_an %>% 
                filter(!is.na(VALGT_JN)) %>% 
                summarise(mean = mean(inc_res), se = sqrt(var(inc_res)/length(inc_res))) %>% 
                mutate(year = k,
                       level = "Parliament",
                       place = 2,
                       comparison = "Candidates")) %>%
    bind_rows(data_an %>% 
                filter(VALGT_JN == 1) %>% 
                summarise(mean = mean(inc_res), se = sqrt(var(inc_res)/length(inc_res))) %>% 
                mutate(year = k,
                       level = "Parliament",
                       place = 1,
                       comparison = "Electees"))
  
  ttest_data <-
    ttest_data %>%
    bind_rows(ttest_data_k)  
  
  print(k)
}

# add variable for shading in plot 
ttest_data <- 
  left_join(ttest_data, 
            ttest_data %>% 
              group_by(year) %>% 
              filter(comparison == "Population") %>% 
              mutate(xmin = mean + qnorm(0.085) * se,
                     xmax = mean + qnorm(0.915) * se) %>%
              select(c("year", "level", "xmin", "xmax")),
            by = c("year", "level"))

plot_ttest_local <- 
  ggplot(data = ttest_data %>% filter(level == "Local council"),
         aes(x    = mean,
             xmin = mean + qnorm(0.025) * se,
             xmax = mean + qnorm(0.975) * se,
             y    = place)) +
  geom_errorbarh(height = 0, size = 1) +
  geom_point(size = 2) +
  geom_errorbarh(aes(xmin = mean + qnorm(0.085) * se,
                     xmax = mean + qnorm(0.915) * se,
                     y    = place), height = 0, size = 1.5) +
  facet_wrap(. ~ year, ncol = 3) +
  scale_y_continuous("", breaks = 1:3, labels = unique(ttest_data$comparison)[3:1]) +
  scale_x_continuous("") +
  theme_classic() + 
  geom_rect(aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf))

last_plot()

plot_ttest_national <- 
  ggplot(data = ttest_data %>% filter(level == "Parliament"),
         aes(x    = mean,
             xmin = mean + qnorm(0.025) * se,
             xmax = mean + qnorm(0.975) * se,
             y    = place)) +
  geom_errorbarh(height = 0, size = 1) +
  geom_point(size = 2) +
  geom_errorbarh(aes(xmin = mean + qnorm(0.085) * se,
                     xmax = mean + qnorm(0.915) * se,
                     y    = place), height = 0, size = 1.5) +
  facet_wrap(. ~ year, ncol = 3) +
  scale_y_continuous("", breaks = 1:3, labels = unique(ttest_data$comparison)[3:1]) +
  scale_x_continuous("") +
  theme_classic() + 
  geom_rect(aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf))
